*=============================
*This do file clean and process the MTurk data
*use four subfolders:
*data: store the data
*log: store the lob file
*grap: export the graphs
*output: export the tables
*=============================

*=============================
*cleanup raw data
*there are 6 sessions, corresponding to 6 treatments 
*for sessions 4-6, the participate id is assigned in the do file
* (this was done manually in the input excel file for sessions 1-3) 
*=============================



foreach i of numlist 1/6{
import excel using data/mturksession`i'.xlsx, firstrow clear
*subsession.valid_round=1 for valid decision variables
*=0 placeholders
*=2 after experiment surveys
replace subsessionvalid_round=2 if subsessionround_number==110
*remove comments
drop playercomments
drop if subsessionvalid_round==0
*sort by id and round
sort participantid_in_session subsessionround_number
gen session=`i'

*define subject id in treatment 4

replace participantid_in_session=	1	if `i'==4 & sessioncode=="11nrg4lt"
replace participantid_in_session=	2	if `i'==4 & sessioncode=="1o59e2qg"
replace participantid_in_session=	3	if `i'==4 & sessioncode=="6ytryirh"
replace participantid_in_session=	4	if `i'==4 & sessioncode=="6zp2grig"
replace participantid_in_session=	5	if `i'==4 & sessioncode=="7a3bddbw"
replace participantid_in_session=	6	if `i'==4 & sessioncode=="91xovksj"
replace participantid_in_session=	7	if `i'==4 & sessioncode=="9407eaez"
replace participantid_in_session=	8	if `i'==4 & sessioncode=="9r1u0dw0"
replace participantid_in_session=	9	if `i'==4 & sessioncode=="9s6gq1dv"
replace participantid_in_session=	10	if `i'==4 & sessioncode=="9zebf2oh"
replace participantid_in_session=	11	if `i'==4 & sessioncode=="auj698nv"
replace participantid_in_session=	12	if `i'==4 & sessioncode=="av667ir9"
replace participantid_in_session=	13	if `i'==4 & sessioncode=="ftvubcxn"
replace participantid_in_session=	14	if `i'==4 & sessioncode=="ginv3rtn"
replace participantid_in_session=	15	if `i'==4 & sessioncode=="h423x4vl"
replace participantid_in_session=	16	if `i'==4 & sessioncode=="jl18w7uc"
replace participantid_in_session=	17	if `i'==4 & sessioncode=="m0wpmn5b"
replace participantid_in_session=	18	if `i'==4 & sessioncode=="m6hvodif"
replace participantid_in_session=	19	if `i'==4 & sessioncode=="o3ji5byq"
replace participantid_in_session=	20	if `i'==4 & sessioncode=="rw4puryf"
replace participantid_in_session=	21	if `i'==4 & sessioncode=="sechb584"
replace participantid_in_session=	22	if `i'==4 & sessioncode=="sswmfqc5"
replace participantid_in_session=	23	if `i'==4 & sessioncode=="t43t3v5d"
replace participantid_in_session=	24	if `i'==4 & sessioncode=="v45j3x2a"
replace participantid_in_session=	25	if `i'==4 & sessioncode=="weop9pq7"
replace participantid_in_session=	26	if `i'==4 & sessioncode=="wpkpgich"
replace participantid_in_session=	27	if `i'==4 & sessioncode=="wyr3hd27"
replace participantid_in_session=	28	if `i'==4 & sessioncode=="x5qz0ujl"
replace participantid_in_session=	29	if `i'==4 & sessioncode=="xkp2hmnz"
replace participantid_in_session=	30	if `i'==4 & sessioncode=="zra7mpvk"

*define subject id in treatment 5

replace participantid_in_session=	1	if `i'==5 & sessioncode=="1s729upy"
replace participantid_in_session=	2	if `i'==5 & sessioncode=="31st3skl"
replace participantid_in_session=	3	if `i'==5 & sessioncode=="341eem4k"
replace participantid_in_session=	4	if `i'==5 & sessioncode=="3asdmf92"
replace participantid_in_session=	5	if `i'==5 & sessioncode=="4swn1g50"
replace participantid_in_session=	6	if `i'==5 & sessioncode=="4yu9l50r"
replace participantid_in_session=	7	if `i'==5 & sessioncode=="51q29uxo"
replace participantid_in_session=	8	if `i'==5 & sessioncode=="545te0zu"
replace participantid_in_session=	9	if `i'==5 & sessioncode=="58jv9yvb"
replace participantid_in_session=	10	if `i'==5 & sessioncode=="63w2mgt5"
replace participantid_in_session=	11	if `i'==5 & sessioncode=="99a71jhc"
replace participantid_in_session=	12	if `i'==5 & sessioncode=="9ogxh8oj"
replace participantid_in_session=	13	if `i'==5 & sessioncode=="a7n1imh0"
replace participantid_in_session=	14	if `i'==5 & sessioncode=="bbmzv64q"
replace participantid_in_session=	15	if `i'==5 & sessioncode=="dhnt2019"
replace participantid_in_session=	16	if `i'==5 & sessioncode=="gqdrr1xr"
replace participantid_in_session=	17	if `i'==5 & sessioncode=="jav3tfou"
replace participantid_in_session=	18	if `i'==5 & sessioncode=="kmrqacq1"
replace participantid_in_session=	19	if `i'==5 & sessioncode=="kokzjx64"
replace participantid_in_session=	20	if `i'==5 & sessioncode=="m2c0jr7p"
replace participantid_in_session=	21	if `i'==5 & sessioncode=="nd694dho"
replace participantid_in_session=	22	if `i'==5 & sessioncode=="owdsjqsb"
replace participantid_in_session=	23	if `i'==5 & sessioncode=="qmjtmdsc"
replace participantid_in_session=	24	if `i'==5 & sessioncode=="sb9k85f4"
replace participantid_in_session=	25	if `i'==5 & sessioncode=="setkiagb"
replace participantid_in_session=	26	if `i'==5 & sessioncode=="wi39s8nb"
replace participantid_in_session=	27	if `i'==5 & sessioncode=="xaj5cv22"
replace participantid_in_session=	28	if `i'==5 & sessioncode=="yhm7w04h"
replace participantid_in_session=	29	if `i'==5 & sessioncode=="zrt5kodq"
replace participantid_in_session=	30	if `i'==5 & sessioncode=="zy9orz85"

*define subject id in treatment 6
replace participantid_in_session=	1	if `i'==6 & sessioncode=="3lvdfxoe"
replace participantid_in_session=	2	if `i'==6 & sessioncode=="3xtk86by"
replace participantid_in_session=	3	if `i'==6 & sessioncode=="9pv3yt7x"
replace participantid_in_session=	4	if `i'==6 & sessioncode=="a2xnlazx"
replace participantid_in_session=	5	if `i'==6 & sessioncode=="cublkulk"
replace participantid_in_session=	6	if `i'==6 & sessioncode=="di5m3uz8"
replace participantid_in_session=	7	if `i'==6 & sessioncode=="dn9uuvzb"
replace participantid_in_session=	8	if `i'==6 & sessioncode=="ejm37zni"
replace participantid_in_session=	9	if `i'==6 & sessioncode=="er2ulfc1"
replace participantid_in_session=	10	if `i'==6 & sessioncode=="fe9ev2r8"
replace participantid_in_session=	11	if `i'==6 & sessioncode=="gvkn8ey1"
replace participantid_in_session=	12	if `i'==6 & sessioncode=="ha400com"
replace participantid_in_session=	13	if `i'==6 & sessioncode=="jbqdxhaf"
replace participantid_in_session=	14	if `i'==6 & sessioncode=="jp8ybph8"
replace participantid_in_session=	15	if `i'==6 & sessioncode=="jwprhm63"
replace participantid_in_session=	16	if `i'==6 & sessioncode=="khdevnfw"
replace participantid_in_session=	17	if `i'==6 & sessioncode=="kvc2lm4k"
replace participantid_in_session=	18	if `i'==6 & sessioncode=="leenwedm"
replace participantid_in_session=	19	if `i'==6 & sessioncode=="mdmtlrzn"
replace participantid_in_session=	20	if `i'==6 & sessioncode=="mgzekf9u"
replace participantid_in_session=	21	if `i'==6 & sessioncode=="n0vtif8f"
replace participantid_in_session=	22	if `i'==6 & sessioncode=="nnx8ti9t"
replace participantid_in_session=	23	if `i'==6 & sessioncode=="nwe4jk09"
replace participantid_in_session=	24	if `i'==6 & sessioncode=="pysxpdnk"
replace participantid_in_session=	25	if `i'==6 & sessioncode=="qz6jmgt6"
replace participantid_in_session=	26	if `i'==6 & sessioncode=="x0d8ksrr"
replace participantid_in_session=	27	if `i'==6 & sessioncode=="x1h8hcpb"
replace participantid_in_session=	28	if `i'==6 & sessioncode=="xd8dhj8e"
replace participantid_in_session=	29	if `i'==6 & sessioncode=="yc8f95mm"
replace participantid_in_session=	30	if `i'==6 & sessioncode=="ycyfkmpd"


*deal with inconsistent coding of the FL question 3 in the raw data
if (`i'==3) | (`i'==6) {
replace playerMC_answer_3="True" if playerMC_answer_3=="1"
replace playerMC_answer_3="False" if playerMC_answer_3=="0"
}


if (`i'==4) | (`i'==5) {
gen temp=""
replace temp="True" if playerMC_answer_3==1
replace temp="False" if playerMC_answer_3==0
drop playerMC_answer_3
gen playerMC_answer_3=temp
drop temp
}

tab participantid_in_session
tab playerMC_answer_3
drop if subsessionround_number==.
save data/mturksession`i', replace
}


*=============================
*process the data
*=============================


cap log close
log using log/analysis_mturk, replace
use data/mturksession1, clear
foreach i of numlist 2/6{
append using data/mturksession`i', force
}
append using data/model, force


*assigning treatments
*data 
*=1, tda, =2 no-tda old tax
* =3 no tda new tax 
* =4, tda no education
* =5 no tda with CA first
*=6, no tda with CA consumption first
gen sessiontype=session
*add information to the model data
replace sessiontype=11 if (participant_current_app_name=="tda_model")
replace sessiontype=12 if (participant_current_app_name=="oldtax_model")
replace sessiontype=13 if (participant_current_app_name=="newtax_model")

replace participantid_in_session=1 if sessiontype==11
replace participantid_in_session=1 if sessiontype==12
replace participantid_in_session=1 if sessiontype==13



tab sessiontype if subsessionround_number==110

*generate treatment dummies
gen t1=(sessiontype==1)
gen t2=(sessiontype==2)
gen t3=(sessiontype==3)
gen t4=(sessiontype==4)
gen t5=(sessiontype==5)
gen t6=(sessiontype==6)
*treatment effects 
global treatmentlist="t2 t3 t4 t5 t6"
*treatment effects for contribution rate only
global treatmentlist_ca="t4 t5 t6"


*assign theory predictions to treatments and models
*tda
gen treatment=1 
*non-tda old tax
replace treatment=2 if (sessiontype==2 | sessiontype==5 | sessiontype==6 | sessiontype==12)
*non-tda new tax
replace treatment=3 if (sessiontype==3 | sessiontype==13)
*id number
gen id=sessiontype*100+participantid_in_session
*assign sequence to model predictions
replace subsessionsequence=1 if sessiontype>=11
gen id_sequence=id*10+subsessionsequence


*generate survey response summary
* number of corrected crt questions
gen temp=((playeranswer_1==4)+(playeranswer_2==29)+(playeranswer_3==20)+(playeranswer_4=="lost money in the stock market")) if subsessionround_number==110
bysort id: egen crt=mean(temp)
drop temp

* number of correct financial literacy questions
gen temp=((playerMC_answer_1=="More than $102")+(playerMC_answer_2=="Less than today")+(playerMC_answer_3=="False")) if subsessionround_number==110
bysort id: egen financialliteracy=mean(temp)
drop temp

*number of A choices
*risk averse indicator
gen temp=playerrisk_answer_01+playerrisk_answer_02+playerrisk_answer_03+playerrisk_answer_04+playerrisk_answer_05+playerrisk_answer_06/*
*/+playerrisk_answer_07+playerrisk_answer_08+playerrisk_answer_09+playerrisk_answer_10-10 
replace temp=10-temp 
replace temp=. if subsessionround_number~=110
bysort id: egen riskaverse=mean(temp)
drop temp

* not monotonic indicator
gen temp=0 
replace temp=1 if playerrisk_answer_09>playerrisk_answer_10
foreach i of numlist 8(-1)1 {
replace temp=1 if playerrisk_answer_0`i'>playerrisk_answer_0`=`i'+1'
}
replace temp=. if subsessionround_number~=110
bysort id: egen riskaverse_ab=mean(temp)
drop temp

*demographic features
gen temp=(playergender=="Female")
replace temp=. if subsessionround_number~=110
bysort id: egen female=mean(temp)
drop temp

gen temp=1 /*white*/
replace temp=2 if (playerrace=="African American or Black")
replace temp=3 if (playerrace=="Hispanic or Latinx")
replace temp=4 if (playerrace=="Asian")
replace temp=. if subsessionround_number~=110
bysort id: egen race=mean(temp)
drop temp


*for constructing demographic features
gen temp=(playerrace=="White")
replace temp=. if subsessionround_number~=110
bysort id: egen race_white=mean(temp)
drop temp
gen temp=(playerrace=="Asian")
replace temp=. if subsessionround_number~=110
bysort id: egen race_asian=mean(temp)
drop temp
gen temp=(playerincome=="$0 to $25,000")
replace temp=. if subsessionround_number~=110
bysort id: egen income_0=mean(temp)
drop temp
gen temp=(playerincome=="$25,001 to $50,000")
replace temp=. if subsessionround_number~=110
bysort id: egen income_25=mean(temp)
drop temp
gen temp=((playereducation=="College degree")|(playereducation=="Graduate or professional degree"))
replace temp=. if subsessionround_number~=110
bysort id: egen education_high=mean(temp)
drop temp
gen temp=playerage
replace temp=. if subsessionround_number~=110
bysort id: egen age=mean(temp)
drop temp


*remove questionaire data sequence
drop if subsessionround_number==110


*define after tax income
replace playerafter_tax_income=playertaxable_income-playertax_paid if sessiontype>=11
*add capital gains taxes to tax payed
replace playertax_paid=playertax_paid+playercapital_gains_tax_paid if sessiontype==3




*change the unit
*in dollar 
*cumulative payoff each sequence assuming has not ended
replace playertotal_payoff=playertotal_payoff/100
*period payoff 
replace playerpayoff=playerpayoff/100 


*in thousands
replace playerconsumption=playerconsumption/1000
replace playerbalance_RA=playerbalance_RA/1000
replace playerbalance_TDA=playerbalance_TDA/1000
replace playerbalance_CA=playerbalance_CA/1000

replace playertaxable_income=playertaxable_income /1000
replace  playertax_paid=playertax_paid/1000
replace playerafter_tax_income=playerafter_tax_income/1000
*different variables are used to record contribution amount in T4-T6
*resolve this inconsistency in variable names
replace playercontributionTDA=0 if playercontributionTDA==.
replace playercontributionCA=0 if playercontributionCA==.
replace playercontributionTDA=playercontributionTDA/1000
replace playercontributionCA=playercontributionCA/1000
replace playercontribution=playercontribution/1000
replace playercontribution=playercontributionTDA if sessiontype==4 
replace playercontribution=playercontributionCA if sessiontype==5 | sessiontype==6
replace playercontribution=0 if playercontribution==.

replace playerendowment=playerendowment/1000


*net worth
replace playerbalance_TDA=0 if playerbalance_TDA==.
replace playerbalance_CA=0 if  playerbalance_CA==.

gen networth=playerbalance_RA+playerbalance_TDA+playerbalance_CA


*average and mean TDA balance at period 15
sum playerbalance_TDA if subsessionround_in_sequence==15 & sessiontype==1, d
sum playerbalance_TDA if subsessionround_in_sequence==15 & sessiontype==4, d
sum playerbalance_CA if subsessionround_in_sequence==15 & sessiontype==5, d
sum playerbalance_CA if subsessionround_in_sequence==15 & sessiontype==6, d

*find out CA account usage
sum playerbalance_CA if subsessionround_in_sequence==15 & sessiontype==5, d
sum playerbalance_CA if subsessionround_in_sequence==15 & sessiontype==6, d
list id if playerbalance_CA==0 & subsessionround_in_sequence==15 & sessiontype==5
list id if playerbalance_CA==0 & subsessionround_in_sequence==15 & sessiontype==6




tsset id_sequence subsessionround_in_sequence
*calculate total tax savings from TDA (both contribution and accumulated interests)
gen playertaxable_income_before=playertaxable_income+playercontribution if (sessiontype==1 | sessiontype==4 | sessiontype==11) & subsessionround_in_sequence==1
replace playertaxable_income_before=playertaxable_income+playercontribution+L.playerbalance_TDA*0.1 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & subsessionround_in_sequence>1
gen playertax_paid_before=. 
replace playertax_paid_before=0 if (sessiontype==1 | sessiontype==4 | sessiontype==11)
replace playertax_paid_before=0.1*(playertaxable_income_before-21) if (sessiontype==1 | sessiontype==4 | sessiontype==11) & playertaxable_income_before>21
replace playertax_paid_before=0.15*(playertaxable_income_before-39)+1.8 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & playertaxable_income_before>39
replace playertax_paid_before=0.25*(playertaxable_income_before-96)+10.35 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & playertaxable_income_before>96
gen tax_exp=.
replace tax_exp=playertax_paid_before-playertax_paid

*calculate tax savings from TDA contribution alone
gen temp1=playertaxable_income+playercontribution if (sessiontype==1 | sessiontype==4 | sessiontype==11) 
gen temp2=. 
replace temp2=0 if (sessiontype==1 | sessiontype==4 | sessiontype==11)
replace temp2=0.1*(temp1-21) if (sessiontype==1 | sessiontype==4 | sessiontype==11) & temp1>21
replace temp2=0.15*(temp1-39)+1.8 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & temp1>39
replace temp2=0.25*(temp1-96)+10.35 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & temp1>96
gen tdactaxsave=temp2-playertax_paid
replace tdactaxsave=0 if tdactaxsave==.
*total savings
gen totalsaving=playerafter_tax_income-playerconsumption
replace totalsaving=playercontribution-tdactaxsave+totalsaving if (sessiontype==1 | sessiontype==4 | sessiontype==11) 
drop temp1 temp2

replace totalsaving=. if subsessionround_in_sequence>15
*revised after tax income adding TDA contributions and deducting tax savings on TDA contribution 
gen revised_after_tax_income=playerafter_tax_income
replace revised_after_tax_income=playerafter_tax_income+playercontribution-tdactaxsave if (sessiontype==1 | sessiontype==4 | sessiontype==11) 
*contribution rate
gen contribution2endowment=playercontribution/playerendowment
replace contribution2endowment=. if subsessionround_in_sequence>15
gen contribution2aftertaxinc=playercontribution/revised_after_tax_income
replace contribution2aftertaxinc=. if subsessionround_in_sequence>15

replace contribution2endowment=. if (sessiontype==2) |   (sessiontype==3) | (sessiontype==12) | (sessiontype==13)   
replace contribution2aftertaxinc=. if (sessiontype==2) |   (sessiontype==3) | (sessiontype==12) | (sessiontype==13)   
*total saving rate
gen totalsaving2endowment=totalsaving/playerendowment
gen totalsaving2aftertaxinc=totalsaving/revised_after_tax_income

sum contribution2endowment if sessiontype==5
sum contribution2endowment if sessiontype==6



*generate revised TDA balances b* 
*assuming subjects pay taxes on TDA contribution and accumulated interests. 
gen revised_playerbalance_TDA=0
replace revised_playerbalance_TDA=playerbalance_TDA-tax_exp if subsessionround_in_sequence==1 &  (sessiontype==1 | sessiontype==4 | sessiontype==11)
foreach i of numlist 2/35{
gen temp1=playertaxable_income+playercontribution+L.revised_playerbalance_TDA*0.1 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & subsessionround_in_sequence==`i'
gen temp2=. 
replace temp2=0 if (sessiontype==1 | sessiontype==4 | sessiontype==11)
replace temp2=0.1*(temp1-21) if (sessiontype==1 | sessiontype==4 | sessiontype==11) & temp1>21
replace temp2=0.15*(temp1-39)+1.8 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & temp1>39
replace temp2=0.25*(temp1-96)+10.35 if (sessiontype==1 | sessiontype==4 | sessiontype==11) & temp1>96
replace revised_playerbalance_TDA=L.revised_playerbalance_TD*1.1+playercontribution-(temp2-playertax_paid) if subsessionround_in_sequence==`i' &  (sessiontype==1 | sessiontype==4 | sessiontype==11) & playertaxable_income>0
drop temp1 temp2
}


*ending round for each session
bysort id_sequence: egen endinground=total(subsessionpaid_round) if  subsessionround_in_sequence>=1 & subsessionround_in_sequence<=35 




*theretical survival rate for each model period (the probabiltiy each period is the last period)
gen survprob=1
foreach i of numlist 16/35{
replace survprob=(0.9)^(`i'-16) if subsessionround_in_sequence==`i'
}



*generate theory predicted consumption levels
sort sessiontype session participantid_in_session subsessionsequence subsessionround_in_sequence
gen temp=playerconsumption if sessiontype>=10
bysort treatment subsessionround_in_sequence: egen consumption_theory=mean(temp)
drop temp
*deviation measures
gen consumption_dev=playerconsumption-consumption_theory
gen con_dev2=(playerconsumption-consumption_theory)^2
gen con_per_dev2=((playerconsumption-consumption_theory)/consumption_theory)^2*100


*5 period average values: 1 periods 1-5, 6 p 6-10, 11 p11-15, 16 p16-20, 21 p21-25, 26 p26-30, 31 p31-35
global varlist periodlist="1 6 11 16 21"

foreach i of numlist 1(5)31{
bysort id_sequence : egen mse`i'=mean(con_dev2) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
bysort id_sequence : egen mspe`i'=mean(con_per_dev2) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
bysort id_sequence : egen con`i'=mean(playerconsumption) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
bysort id_sequence : egen networth`i'=mean(networth) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
}

*first 25 period average values
bysort id_sequence : egen mse25=mean(con_dev2) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=25
bysort id_sequence : egen mspe25=mean(con_per_dev2) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=25
bysort id_sequence : egen con25=mean(playerconsumption) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=25
bysort id_sequence : egen networth25=mean(networth) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=25

*five period average
foreach i of numlist 1(5)11{
bysort id_sequence : egen totalsaving2endowment`i'=mean(totalsaving2endowment) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
bysort id_sequence : egen totalsaving2aftertaxinc`i'=mean(totalsaving2aftertaxinc) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
bysort id_sequence : egen contribution2endowment`i'=mean(contribution2endowment) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
bysort id_sequence : egen contribution2aftertaxinc`i'=mean(contribution2aftertaxinc) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
}
*average of first 15 periods

bysort id_sequence : egen totalsaving2endowment15=mean(totalsaving2endowment) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=15
bysort id_sequence : egen totalsaving2aftertaxinc15=mean(totalsaving2aftertaxinc) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=15
bysort id_sequence : egen contribution2endowment15=mean(contribution2endowment) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=15
bysort id_sequence : egen contribution2aftertaxinc15=mean(contribution2aftertaxinc) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=15


*maximum number of rounds with decision data
bysort id_sequence: egen maxround=max(subsessionround_in_sequence) 

* 35 period average for the subsample with valid data for all 35 periods
bysort id_sequence : egen mse35=mean(con_dev2) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=35 & maxround==35
bysort id_sequence : egen mspe35=mean(con_per_dev2) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=35 & maxround==35
bysort id_sequence : egen con35=mean(playerconsumption) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=35 & maxround==35
bysort id_sequence : egen networth35=mean(networth) if subsessionround_in_sequence>=1 & subsessionround_in_sequence<=35 & maxround==35

*define per period payoff for model 
tsset id_sequence subsessionround_in_sequence
foreach j of numlist 11/13{
	replace playerpayoff= playertotal_payoff if  subsessionround_in_sequence==1 & sessiontype==`j'
	foreach i of numlist 2/35{
	replace playerpayoff= playertotal_payoff-L.playertotal_payoff if  subsessionround_in_sequence==`i'	 &  sessiontype==`j'
	}
}

*total payoff for 5 periods weighted by survival proba
foreach i of numlist 1(5)31{
bysort id_sequence : egen payoff`i'=total(playerpayoff*survprob) if subsessionround_in_sequence>=`i' & subsessionround_in_sequence<`=`i'+5'
}

*average CA interest in treatment 5
sum playerCA_interest if sessiontype==5 & subsessionround_in_sequence>=11 & subsessionround_in_sequence<=15

*payoff from the first 25 periods (only)

bysort id_sequence: egen payoff24=total(playerpayoff*survprob) if  subsessionround_in_sequence>=1 & subsessionround_in_sequence<=25
*lifetime utility, add the average payoff for the last 10 periods for subjects who end at period 25
bysort id_sequence: egen payoff25=total(playerpayoff*survprob) if  subsessionround_in_sequence>=1 & subsessionround_in_sequence<=35
foreach i of numlist 1/6{
	sum payoff26 if sessiontype==`i'
	replace payoff25=payoff25+r(mean) if sessiontype==`i' & maxround==25
	sum payoff31 if sessiontype==`i'
	replace payoff25=payoff25+r(mean) if sessiontype==`i' & maxround==25
	
}


*=============================
*control variables
*=============================


gen subsessionsequence2=(subsessionsequence==2)
global control="crt financialliteracy riskaverse female subsessionsequence2"


*  summary statistics about demographics

local varlist2="female age race_white race_asian income_0 income_25 education_high crt financialliteracy riskaverse participantpayoff"

estpost summ `varlist2' if subsessionround_in_sequence==1 & sessiontype<10 & subsessionsequence==1, d
estout using "output/summary_mturk.xls",  cells("count mean sd p10 p25 p50 p75 p90") title("all alternative statistics") replace

*=============================
*regression analysis
*the number of regressions are more than that reported in the paper.
*regressions used to construct tables reported in the paper and appendix are noted
*each regression result includes 
*the estimated coefficient const and t2-t6
*the treatment difference diffxx and sdxx
*the difference in theory predictions mean21 mean31 mean32
*=============================
*mean squared percent errors 

est clear
foreach i of numlist 1 (5)21 25 26 31 35{
reg mspe`i'  $treatmentlist $control if subsessionround_in_sequence==`i' & sessiontype<10, vce(cluster id)
foreach k of numlist 2/5{
	foreach m of numlist `=`k'+1'/6{
lincom t`m'-t`k'
estadd scalar diff`m'`k' r(estimate)
estadd scalar sd`m'`k' r(se)
} 
}
est store mspe`i'
}


esttab mspe* using "output/result_mturk.csv", /*
*/  keep(_cons $treatmentlist $control) replace cells(b(star fmt(%9.2f)) se(par fmt(%9.2f))) star( * 0.10 ** 0.05 *** 0.010)  /*
*/ stats(r2_a N diff32 sd32 diff42 sd42 diff52 sd52 diff62 sd62 diff43 sd43 diff53 sd53 diff63 sd63 diff54 sd54 diff64 sd64 diff65 sd65  , fmt(%9.2f %9.0g ) labels(R-squared N T3-T2 sd T4-T2 sd T5-T2 sd T6-T2 sd T4-T3 sd T5-T3 sd T6-T3 sd T5-T4 sd T6-T4 sd T6-T5 sd))  collabels(none) title(" mspe")

*mean square erros 

est clear
foreach i of numlist 1 (5)21 25 26 31 35{
reg mse`i'  $treatmentlist $control if subsessionround_in_sequence==`i' & sessiontype<10, vce(cluster id)
foreach k of numlist 2/5{
	foreach m of numlist `=`k'+1'/6{
lincom t`m'-t`k'
estadd scalar diff`m'`k' r(estimate)
estadd scalar sd`m'`k' r(se)
} 
}
est store mse`i'
}

esttab mse* using "output/result_mturk.csv", /*
*/  keep(_cons $treatmentlist $control) append cells(b(star fmt(%9.2f)) se(par fmt(%9.2f))) star( * 0.10 ** 0.05 *** 0.010)  /*
*/ stats(r2_a N diff32 sd32 diff42 sd42 diff52 sd52 diff62 sd62 diff43 sd43 diff53 sd53 diff63 sd63 diff54 sd54 diff64 sd64 diff65 sd65  , fmt(%9.2f %9.0g ) labels(R-squared N T3-T2 sd T4-T2 sd T5-T2 sd T6-T2 sd T4-T3 sd T5-T3 sd T6-T3 sd T5-T4 sd T6-T4 sd T6-T5 sd))  collabels(none) title(" mse")

*contribution rate

est clear
 foreach i of numlist 1(5)11 15{
reg contribution2endowment`i'  $treatmentlist $control if subsessionround_in_sequence==`i' & sessiontype<10, vce(cluster id)
foreach k of numlist 2/5{
	foreach m of numlist `=`k'+1'/6{
lincom t`m'-t`k'
estadd scalar diff`m'`k' r(estimate)
estadd scalar sd`m'`k' r(se)
} 
}
est store contribution2endowment`i'
}


*saving rate

 foreach i of numlist 1(5)11 15{
reg totalsaving2endowment`i'  $treatmentlist $control if subsessionround_in_sequence==`i' & sessiontype<10, vce(cluster id)
foreach k of numlist 2/5{
	foreach m of numlist `=`k'+1'/6{
lincom t`m'-t`k'
estadd scalar diff`m'`k' r(estimate)
estadd scalar sd`m'`k' r(se)
} 
}
foreach j of numlist 1/3{
sum totalsaving2endowment`i' if subsessionround_in_sequence==`i' & sessiontype==`=`j'+10'
local mean`j'=r(mean)
}
estadd scalar mean21=`mean2'-`mean1'
estadd scalar mean31=`mean3'-`mean1'
estadd scalar mean32=`mean3'-`mean2'

est store totalsaving2endowment`i'
}





esttab contribution2endowment* totalsaving2endowment* using "output/result_mturk.csv", /*
*/  keep(_cons $treatmentlist $control) append cells(b(star fmt(%9.2f)) se(par fmt(%9.2f))) star( * 0.10 ** 0.05 *** 0.010)  /*
*/ stats(r2_a N diff32 sd32 diff42 sd42 diff52 sd52 diff62 sd62 diff43 sd43 diff53 sd53 diff63 sd63 diff54 sd54 diff64 sd64 diff65 sd65  mean21 mean31 mean32 , fmt(%9.2f %9.0g ) labels(R-squared N T3-T2 sd T4-T2 sd T5-T2 sd T6-T2 sd T4-T3 sd T5-T3 sd T6-T3 sd T5-T4 sd T6-T4 sd T6-T5 sd T2-T1 T3-T1 T3-T2))  collabels(none) title("to endowment ratio")

*consumption levels

 est clear
foreach i of numlist 1(5)21 25 26 31 35{
reg con`i'  $treatmentlist $control if subsessionround_in_sequence==`i' & sessiontype<10, vce(cluster id)
foreach k of numlist 2/5{
	foreach m of numlist `=`k'+1'/6{
lincom t`m'-t`k'
estadd scalar diff`m'`k' r(estimate)
estadd scalar sd`m'`k' r(se)
} 
}
foreach j of numlist 1/3{
sum con`i' if subsessionround_in_sequence==`i' & sessiontype==`=`j'+10'
local mean`j'=r(mean)
}
estadd scalar mean21=`mean2'-`mean1'
estadd scalar mean31=`mean3'-`mean1'
estadd scalar mean32=`mean3'-`mean2'
est store con`i'
}

esttab con* using "output/result_mturk.csv", /*
*/  keep(_cons $treatmentlist $control) append cells(b(star fmt(%9.2f)) se(par fmt(%9.2f))) star( * 0.10 ** 0.05 *** 0.010)  /*
*/ stats(r2_a N diff32 sd32 diff42 sd42 diff52 sd52 diff62 sd62 diff43 sd43 diff53 sd53 diff63 sd63 diff54 sd54 diff64 sd64 diff65 sd65  mean21 mean31 mean32 , fmt(%9.2f %9.0g ) labels(R-squared N T3-T2 sd T4-T2 sd T5-T2 sd T6-T2 sd T4-T3 sd T5-T3 sd T6-T3 sd T5-T4 sd T6-T4 sd T6-T5 sd T2-T1 T3-T1 T3-T2))  collabels(none) title(" consumption")




*average 5 period payoff, lifetime utility payoff25, cumulative payment for first 25 period payoff24

foreach i of numlist 1(5)21 24 26 31 25 {
reg payoff`i'  $treatmentlist $control if subsessionround_in_sequence==`i' & sessiontype<10, vce(cluster id)
foreach k of numlist 2/5{
	foreach m of numlist `=`k'+1'/6{
lincom t`m'-t`k'
estadd scalar diff`m'`k' r(estimate)
estadd scalar sd`m'`k' r(se)
} 
}
foreach j of numlist 1/3{
sum payoff`i' if subsessionround_in_sequence==`i' & sessiontype==`=`j'+10'
local mean`j'=r(mean)
}
estadd scalar mean21=`mean2'-`mean1'
estadd scalar mean31=`mean3'-`mean1'
estadd scalar mean32=`mean3'-`mean2'

est store payoff`i'
}


esttab payoff* using "output/result_mturk.csv", /*
*/  keep(_cons $treatmentlist $control) append cells(b(star fmt(%9.2f)) se(par fmt(%9.2f))) star( * 0.10 ** 0.05 *** 0.010)  /*
*/ stats(r2_a N diff32 sd32 diff42 sd42 diff52 sd52 diff62 sd62 diff43 sd43 diff53 sd53 diff63 sd63 diff54 sd54 diff64 sd64 diff65 sd65  mean21 mean31 mean32, fmt(%9.2f %9.0g %9.2f %9.2f %9.0g %9.2f) labels( R-squared N T3-T2 sd T4-T2 sd T5-T2 sd T6-T2 sd T4-T3 sd T5-T3 sd T6-T3 sd T5-T4 sd T6-T4 sd T6-T5 sd T2-T1 T3-T1 T3-T2))  collabels(none) title(" payoff")

est clear





gen revised_networth=playerbalance_RA+revised_playerbalance_TDA+playerbalance_CA
replace revised_networth=networth if sessiontype~=1 & sessiontype~=4 & sessiontype~=11

*decomposing the difference in retirement wealth across treatments

gen level=.
gen se=.

gen level_model=0

reg networth t1 if subsessionround_in_sequence==15 & (sessiontype==1 | sessiontype==2), vce(cluster id)
replace level=_b[t1] in 1
replace se=_se[t1] in 1
reg networth t1 if subsessionround_in_sequence==15 & (sessiontype==1 | sessiontype==4), vce(cluster id)
replace level=_b[t1] in 2
replace se=_se[t1] in 2
reg revised_networth t4 if subsessionround_in_sequence==15 & (sessiontype==4 | sessiontype==5), vce(cluster id)
replace level=_b[t4] in 4
replace se=_se[t4] in 4
reg networth t5 if subsessionround_in_sequence==15 & (sessiontype==5 | sessiontype==6), vce(cluster id)
replace level=_b[t5] in 5
replace se=_se[t5] in 5
reg networth t6 if subsessionround_in_sequence==15 & (sessiontype==6 | sessiontype==2) ,vce(cluster id)
replace level=_b[t6] in 6
replace se=_se[t6] in 6
preserve
keep if subsessionround_in_sequence==15 & sessiontype==4 
keep networth revised_networth id_sequence id
rename networth networth1
rename revised_networth networth2
reshape long networth, i(id id_sequence) j(original)
replace original=0 if original==2
reg networth original , vce(cluster id)
local temp1=_b[original]
local temp2=_se[original]
restore
replace level=`temp1' in 3
replace se=`temp2' in 3

sum networth  if subsessionround_in_sequence==15 & sessiontype==11
local t1mean=r(mean)
sum revised_networth  if subsessionround_in_sequence==15 & sessiontype==11
local revised_t1mean=r(mean)
sum networth  if subsessionround_in_sequence==15 & sessiontype==12
local t2mean=r(mean)

replace level_model=0
replace level_model=`t1mean'-`t2mean' in 1
replace level_model=`t1mean'-`revised_t1mean' in 3
replace level_model=`revised_t1mean'-`t2mean' in 4

br level se level_model

 

*graphs




*=============================
*App Figure C2 
*=============================
preserve
global varlist=" playertotal_payoff playerconsumption playerbalance_RA playerbalance_TDA playerbalance_CA playercontribution  revised_playerbalance_TDA "
collapse $varlist , by(sessiontype subsessionround_in_sequence)
drop if subsessionround_in_sequence==55 /*record the final questionnaire*/
replace playerbalance_TDA=0 if playerbalance_TDA==.
replace playerbalance_CA=0 if playerbalance_CA==.
replace revised_playerbalance_TDA=0 if revised_playerbalance_TDA==.

gen networth=playerbalance_RA+playerbalance_TDA+playerbalance_CA
gen revised_networth=playerbalance_RA+revised_playerbalance_TDA+playerbalance_CA
replace revised_networth=networth if sessiontype~=1 & sessiontype~=4 & sessiontype~=11
global varlist="playerconsumption networth "
label var playertotal_payoff "Cumulative earnings"
label var playerconsumption "Consumption (000)"
label var  networth "Total networth (000)"
label var playerbalance_RA "RA balance"
label var playerbalance_TDA "TDA balance (000)"
label var subsessionround_in_sequence "Period"
label var  revised_networth "Revised total networth (000)"
keep if subsessionround_in_sequence<=25 /*record the final questionnaire*/



foreach stub of global varlist{
twoway (line `stub' subsessionround_in_sequence if sessiontype==1 ,lcolor(blue) lwidth(2) ) /*
*/(line `stub' subsessionround_in_sequence if sessiontype==2,  lcolor(red) lwidth(1) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==3,  lcolor(green) lwidth(0.2) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==4 , lp(dash_dot) lcolor(blue) lwidth(2) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==5 , lp(dash_dot) lcolor(red) lwidth(1) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==6 , lp(shortdash) lcolor(red) lwidth(1) ) /*
*/(line `stub' subsessionround_in_sequence if sessiontype==11 , lp(dot) lcolor(blue) lwidth(2)) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==12, lp(dot) lcolor(red) lwidth(1)) /*
*/  (line `stub' subsessionround_in_sequence if sessiontype==13, lp(dot) lcolor(green) lwidth(0.2)) /*
*/, xline(16) legend(order(1 "T1" 2 "T2" 3 "T3" 4 "T4"  5 "T5" 6 "T6" 7 "T1/T4 model" 8 "T2/T5/T6 model" 9 "T3 model") rows(3) size(*0.8) symxsize(*0.8))   name(`stub', replace)
graph save graph/`stub'_mturk, replace
graph export graph/`stub'_mturk.pdf, replace
}

grc1leg playerconsumption networth
graph export graph/meancomparison_mturk.pdf, replace


restore

*=============================
*App Figure C3 (b)
*=============================
*median graph
preserve
global varlist=" playertotal_payoff playerconsumption playerbalance_RA playerbalance_TDA playerbalance_CA playercontribution revised_playerbalance_TDA "
collapse (median) $varlist , by(sessiontype subsessionround_in_sequence)
**# Bookmark #1
drop if subsessionround_in_sequence==55 /*record the final questionnaire*/
replace playerbalance_TDA=0 if playerbalance_TDA==.
replace playerbalance_CA=0 if playerbalance_CA==.
replace revised_playerbalance_TDA=0 if revised_playerbalance_TDA==.

gen networth=playerbalance_RA+playerbalance_TDA+playerbalance_CA
gen revised_networth=playerbalance_RA+revised_playerbalance_TDA+playerbalance_CA
replace revised_networth=networth if sessiontype~=1 & sessiontype~=4 & sessiontype~=11
global varlist="playerconsumption networth "
label var playertotal_payoff "Cumulative earnings"
label var playerconsumption "Consumption (000)"
label var  networth "Total networth (000)"
label var playerbalance_RA "RA balance"
label var playerbalance_TDA "TDA balance (000)"
label var subsessionround_in_sequence "Period"
label var  revised_networth "Revised total networth (000)"
keep if subsessionround_in_sequence<=25 /*record the final questionnaire*/



foreach stub of global varlist{
twoway (line `stub' subsessionround_in_sequence if sessiontype==1 ,lcolor(blue) lwidth(2) ) /*
*/(line `stub' subsessionround_in_sequence if sessiontype==2,  lcolor(red) lwidth(1) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==3,  lcolor(green) lwidth(0.2) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==4 , lp(dash_dot) lcolor(blue) lwidth(2) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==5 , lp(dash_dot) lcolor(red) lwidth(1) ) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==6 , lp(shortdash) lcolor(red) lwidth(1) ) /*
*/(line `stub' subsessionround_in_sequence if sessiontype==11 , lp(dot) lcolor(blue) lwidth(2)) /*
*/ (line `stub' subsessionround_in_sequence if sessiontype==12, lp(dot) lcolor(red) lwidth(1)) /*
*/  (line `stub' subsessionround_in_sequence if sessiontype==13, lp(dot) lcolor(green) lwidth(0.2)) /*
*/, xline(16) legend(order(1 "T1" 2 "T2" 3 "T3" 4 "T4"  5 "T5" 6 "T6" 7 "T1/T4 model" 8 "T2/T5/T6 model" 9 "T3 model") rows(3) size(*0.8) symxsize(*0.8))   name(`stub', replace)
graph save graph/`stub'_median_mturk, replace
graph export graph/`stub'_median_mturk.pdf, replace
}

grc1leg playerconsumption networth
graph export graph/mediancomparison_mturk.pdf, replace

restore



